for q in closed_questions:
df_group = df_unique.groupby(['bot'])[q].value_counts()
df_group = df_group.reset_index(name = 'count')
fig = px.bar(df_group, x=q, y="count", color="bot", title=q, barmode='group')
fig.show()
# !pip install plotly
import pandas as pd
import pickle
import datetime
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
df = pd.read_csv("responses.csv")
df.count()
# 208 How much did you enjoy playing with Anya? 208 How annoyed did you feel with Anya? 208 How smart did you think Anya was? 208 Would you recommend this game to a friend?\n 208 Did you like Anya? 208 How frustrated did Anya make you feel? 208 Do you think Anya improved during the conversation? 208 Did the conversation get easier over time? 208 Did you feel like you were helping Anya with more clues? 208 What did you like most about Anya? 205 What would you change about Anya to make her more enjoyable to play with? 207 Anything else you would like to share? 202 What is your Gender? 208 What is your age group? 208 psid 206 Start Date (UTC) 208 Submit Date (UTC) 208 Network ID 208 dtype: int64
df_psid = df.loc[df["psid"].notnull()]
df_psid.count()
# 206 How much did you enjoy playing with Anya? 206 How annoyed did you feel with Anya? 206 How smart did you think Anya was? 206 Would you recommend this game to a friend?\n 206 Did you like Anya? 206 How frustrated did Anya make you feel? 206 Do you think Anya improved during the conversation? 206 Did the conversation get easier over time? 206 Did you feel like you were helping Anya with more clues? 206 What did you like most about Anya? 203 What would you change about Anya to make her more enjoyable to play with? 205 Anything else you would like to share? 200 What is your Gender? 206 What is your age group? 206 psid 206 Start Date (UTC) 206 Submit Date (UTC) 206 Network ID 206 dtype: int64
SELECT COUNT("realId") FROM c
271
SELECT * FROM r where r.realId like "directlinespeech/conversations/%" SELECT COUNT("realId") FROM r where r.realId like "directlinespeech/conversations/%"
141
SELECT FROM r where r.realId like "directlinespeech/conversations/%" and r._ts >{0} and r._ts <{1} SELECT FROM r where r.realId like "directlinespeech/conversations/%" and r._ts >1631201400 and r._ts <1631295349
# df_bot_a = pd.read_csv("/Users/claudavila/Documents/Clau/3. Academic-Work/2.Laboral/1.AI/Sociate.ai/Demo/demo-anya/sociate-data-anya/conversations_bot_a.csv")
# df_bot_a.head()
# len(df_bot_a)
# df_bot_a['psid_realid'] = df_bot_a[df_bot_a['realId'].str.contains("3pI")]['realId'].str.slice(start=23)
with open('/Users/claudavila/Documents/Clau/3. Academic-Work/2.Laboral/1.AI/Sociate.ai/Demo/demo-anya/sociate-data-anya/conversations_bot_a.pickle', 'rb') as handle:
conversations_obj_bot_a = pickle.load(handle)
len(conversations_obj_bot_a)
713
list_a_psids=[]
print(list_a_psids)
[]
conversations_obj_bot_a [457]['document']['DialogState']['dialogStack']['$values'][0]['state']['foreach']['value']['id']
'aYUEH828Fx7mU1vORe_Thg**'
for i in range(0,len(conversations_obj_bot_a)):
try:
list_a_psids.append(conversations_obj_bot_a [i]['document']['DialogState']['dialogStack']['$values'][0]['state']['foreach']['value']['id'])
except:
list_a_psids.append(0)
pass
len(list_a_psids)
713
list_a_psids_realid=[]
print(list_a_psids_realid)
[]
conversations_obj_bot_a[457]['realId']
'directlinespeech/conversations/c5bd6d75-c42a-4d17-a4b1-8a174b075970'
for i in range(0,len(conversations_obj_bot_a)):
try:
index_id = conversations_obj_bot_a[i]['realId'].find("3pI")
if index_id > 0:
list_a_psids_realid.append(conversations_obj_bot_a[i]['realId'][index_id:])
else:
list_a_psids_realid.append(0)
except:
list_a_psids_realid.append(0)
pass
len(list_a_psids_realid)
713
list_a_time_start=[]
print(list_a_time_start)
[]
conversations_obj_bot_a[459]['document']
{'$type': 'System.Collections.Generic.Dictionary`2[[System.String, System.Private.CoreLib],[System.Object, System.Private.CoreLib]], System.Private.CoreLib',
'timeBeginning': '2021-09-09T04:27:05.01Z',
'isRandom': '0',
'bot': 'B',
'prosody': '7%'}
for i in range(0,len(conversations_obj_bot_a)):
try:
list_a_time_start.append(conversations_obj_bot_a[i]['document']['timeBeginning'])
except:
list_a_time_start.append(0)
pass
len(list_a_time_start)
713
list_a_bot=[]
print(list_a_bot)
[]
conversations_obj_bot_a[567]['document']['isRandom']
'0'
for i in range(0,len(conversations_obj_bot_a)):
try:
if conversations_obj_bot_a[i]['document']['isRandom'] == '0':
list_a_bot.append('A')
else:
list_a_bot.append('B')
except:
list_a_bot.append(0)
pass
len(list_a_bot)
713
list_a_time_finish=[]
print(list_a_time_finish)
[]
conversations_obj_bot_a[571]['document']['timeFinished']
'2021-09-10T08:57:22.197Z'
for i in range(0,len(conversations_obj_bot_a)):
try:
list_a_time_finish.append(conversations_obj_bot_a[i]['document']['timeFinished'])
except:
list_a_time_finish.append(0)
pass
len(list_a_time_finish)
713
list_a_time_last=[]
print(list_a_time_last)
[]
conversations_obj_bot_a[571]['_ts']
1631264242
for i in range(0,len(conversations_obj_bot_a)):
try:
list_a_time_last.append(conversations_obj_bot_a[i]['_ts'])
except:
list_a_time_last.append(0)
pass
len(list_a_time_last)
713
df_bot_a = pd.DataFrame(list(zip(list_a_psids, list_a_psids_realid, list_a_bot, list_a_time_start, list_a_time_finish, list_a_time_last)),
columns =['psid_doc', 'psid_realid', 'bot', 'time_start', 'time_finish', 'time_state'])
df_bot_a.count()
psid_doc 713 psid_realid 713 bot 713 time_start 713 time_finish 713 time_state 713 dtype: int64
df_bot_a["Bot_BD"] = "A"
df_bot_a['timestamp'] = df_bot_a["time_state"].apply(lambda x: datetime.datetime.fromtimestamp(x))
df_bot_a['timeframe'] = (df_bot_a['time_state'].astype('int')>1631178000)*1
df_bot_a['timeframe'].value_counts()
0 479 1 234 Name: timeframe, dtype: int64
# df_bot_b = pd.read_csv("/Users/claudavila/Documents/Clau/3. Academic-Work/2.Laboral/1.AI/Sociate.ai/Demo/demo-anya/sociate-data-anya/conversations_bot_b.csv")
# df_bot_b.head()
# len(df_bot_b)
# df_bot_b['psid_realid'] = df_bot_b[df_bot_b['realId'].str.contains("3pI")]['realId'].str.slice(start=23)
with open('/Users/claudavila/Documents/Clau/3. Academic-Work/2.Laboral/1.AI/Sociate.ai/Demo/demo-anya/sociate-data-anya/conversations_bot_b.pickle', 'rb') as handle:
conversations_obj_bot_b = pickle.load(handle)
len(conversations_obj_bot_b)
295
list_b_psids=[]
print(list_b_psids)
[]
conversations_obj_bot_b[269]['document']['DialogState']['dialogStack']['$values'][0]['state']['foreach']['value']['id']
'3pIXTHtOAS6PbK33y5yu9Q**'
for i in range(0,len(conversations_obj_bot_b)):
try:
list_b_psids.append(conversations_obj_bot_b[i]['document']['DialogState']['dialogStack']['$values'][0]['state']['foreach']['value']['id'])
except:
list_b_psids.append(0)
pass
len(list_b_psids)
295
list_b_psids_realid=[]
print(list_b_psids_realid)
[]
conversations_obj_bot_b[245]['realId']
'directlinespeech/conversations/2d5c77c9-3afe-4062-ab17-0758c2c86164'
for i in range(0,len(conversations_obj_bot_b)):
try:
index_id = conversations_obj_bot_b[i]['realId'].find("3pI")
if index_id > 0:
list_b_psids_realid.append(conversations_obj_bot_b[i]['realId'][index_id:])
else:
list_b_psids_realid.append(0)
except:
list_b_psids_realid.append(0)
pass
len(list_b_psids_realid)
295
list_b_time_start=[]
print(list_b_time_start)
[]
conversations_obj_bot_b[245]['document']
{'$type': 'System.Collections.Generic.Dictionary`2[[System.String, System.Private.CoreLib],[System.Object, System.Private.CoreLib]], System.Private.CoreLib',
'DialogState': {'$type': 'Microsoft.Bot.Builder.Dialogs.DialogState, Microsoft.Bot.Builder.Dialogs',
'dialogStack': {'$type': 'System.Collections.Generic.List`1[[Microsoft.Bot.Builder.Dialogs.DialogInstance, Microsoft.Bot.Builder.Dialogs]], System.Private.CoreLib',
'$values': []}}}
for i in range(0,len(conversations_obj_bot_b)):
try:
list_b_time_start.append(conversations_obj_bot_b[i]['document']['timeBeginning'])
except:
list_b_time_start.append(0)
pass
len(list_b_time_start)
295
list_b_bot=[]
print(list_b_bot)
[]
conversations_obj_bot_b[234]['document']['isRandom']
'1'
for i in range(0,len(conversations_obj_bot_b)):
try:
if conversations_obj_bot_b[i]['document']['isRandom'] == '0':
list_b_bot.append('A')
else:
list_b_bot.append('B')
except:
list_b_bot.append(0)
pass
len(list_b_bot)
295
list_b_time_finish=[]
print(list_b_time_finish)
[]
conversations_obj_bot_b[234]['document']['timeFinished']
'2021-09-10T12:20:21.893Z'
for i in range(0,len(conversations_obj_bot_b)):
try:
list_b_time_finish.append(conversations_obj_bot_b[i]['document']['timeFinished'])
except:
list_b_time_finish.append(0)
pass
len(list_b_time_finish)
295
list_b_time_last=[]
print(list_b_time_last)
[]
conversations_obj_bot_b[145]['_ts']
1631263991
for i in range(0,len(conversations_obj_bot_b)):
try:
list_b_time_last.append(conversations_obj_bot_b[i]['_ts'])
except:
list_b_time_last.append(0)
pass
len(list_b_time_last)
295
df_bot_b = pd.DataFrame(list(zip(list_b_psids, list_b_psids_realid, list_b_bot, list_b_time_start, list_b_time_finish, list_b_time_last)),
columns =['psid_doc', 'psid_realid', 'bot', 'time_start', 'time_finish', 'time_state'])
df_bot_b.count()
psid_doc 295 psid_realid 295 bot 295 time_start 295 time_finish 295 time_state 295 dtype: int64
df_bot_b["Bot_BD"] = "B"
df_bot_b['timestamp'] = df_bot_b["time_state"].apply(lambda x: datetime.datetime.fromtimestamp(x))
df_bot_b['timeframe'] = (df_bot_b['time_state'].astype('int')>1631178000)*1
df_bot_b['timeframe'].value_counts()
1 269 0 26 Name: timeframe, dtype: int64
df_conv = pd.concat([df_bot_b, df_bot_a])
df_conv.count()
psid_doc 1008 psid_realid 1008 bot 1008 time_start 1008 time_finish 1008 time_state 1008 Bot_BD 1008 timestamp 1008 timeframe 1008 dtype: int64
df_conv['psid'] = np.where(df_conv['psid_realid'] == '0', df_conv['psid_doc'], df_conv['psid_realid'])
df_conv['psid'].value_counts()
0 801
3pIXTHtOAS4A1MB_Zebn7w** 1
3pIXTHtOAS7EfDCN_weuYA** 1
3pIXTHtOAS5g6IyQ9Bl5Sw** 1
3pIXTHtOAS4a9rKudjthKQ** 1
...
3pIXTHtOAS4Y3fyyGypizQ** 1
3pIXTHtOAS7DMDpIV2WrMw** 1
3pIXTHtOAS6PbZrsFqkfqw** 1
3pIXTHtOAS5lfWahc6dgPQ** 1
3pIXTHtOAS5mhsZ7kjSYnw** 1
Name: psid, Length: 208, dtype: int64
df_final = df_psid.merge(df_conv, how='outer')
df_final.count()
# 206 How much did you enjoy playing with Anya? 206 How annoyed did you feel with Anya? 206 How smart did you think Anya was? 206 Would you recommend this game to a friend?\n 206 Did you like Anya? 206 How frustrated did Anya make you feel? 206 Do you think Anya improved during the conversation? 206 Did the conversation get easier over time? 206 Did you feel like you were helping Anya with more clues? 206 What did you like most about Anya? 203 What would you change about Anya to make her more enjoyable to play with? 205 Anything else you would like to share? 200 What is your Gender? 206 What is your age group? 206 psid 1034 Start Date (UTC) 206 Submit Date (UTC) 206 Network ID 206 psid_doc 1010 psid_realid 1010 bot 1010 time_start 1010 time_finish 1010 time_state 1010 Bot_BD 1010 timestamp 1010 timeframe 1010 dtype: int64
df_final.columns
Index(['#', 'How much did you enjoy playing with Anya? ',
'How annoyed did you feel with Anya?',
'How smart did you think Anya was?',
'Would you recommend this game to a friend?\n', 'Did you like Anya?',
'How frustrated did Anya make you feel?',
'Do you think Anya improved during the conversation?',
'Did the conversation get easier over time?',
'Did you feel like you were helping Anya with more clues?',
'What did you like most about Anya?',
'What would you change about Anya to make her more enjoyable to play with?',
'Anything else you would like to share?', 'What is your Gender?',
'What is your age group?', 'psid', 'Start Date (UTC)',
'Submit Date (UTC)', 'Network ID', 'psid_doc', 'psid_realid', 'bot',
'time_start', 'time_finish', 'time_state', 'Bot_BD', 'timestamp',
'timeframe'],
dtype='object')
new_cols = ['#', 'How much did you enjoy playing with Anya?',
'How annoyed did you feel with Anya?',
'How smart did you think Anya was?',
'Would you recommend this game to a friend?', 'Did you like Anya?',
'How frustrated did Anya make you feel?',
'Do you think Anya improved during the conversation?',
'Did the conversation get easier over time?',
'Did you feel like you were helping Anya with more clues?',
'What did you like most about Anya?',
'What would you change about Anya to make her more enjoyable to play with?',
'Anything else you would like to share?', 'What is your Gender?',
'What is your age group?', 'psid', 'Start Date (UTC)',
'Submit Date (UTC)', 'Network ID', 'psid_doc', 'psid_realid', 'bot',
'time_start', 'time_finish', 'time_state', 'Bot_BD', 'timestamp',
'timeframe']
questions = ['How much did you enjoy playing with Anya?',
'How annoyed did you feel with Anya?',
'How smart did you think Anya was?',
'Would you recommend this game to a friend?', 'Did you like Anya?',
'How frustrated did Anya make you feel?',
'Do you think Anya improved during the conversation?',
'Did the conversation get easier over time?',
'Did you feel like you were helping Anya with more clues?',
'What did you like most about Anya?',
'What would you change about Anya to make her more enjoyable to play with?',
'Anything else you would like to share?', 'What is your Gender?',
'What is your age group?']
df_final.columns = new_cols
df_id = df_final.loc[df_final['psid']!= 0,]
df_id_prod = df_id.loc[df_id['psid']!= 'aYUEH828Fx7mU1vORe_Thg**',]
df_id_prod['timeframe'].value_counts()
1.0 209 Name: timeframe, dtype: int64
df_id_prod['bot'].value_counts()
B 107 A 102 Name: bot, dtype: int64
df_id_prod['Bot_BD'].value_counts()
B 107 A 102 Name: Bot_BD, dtype: int64
df_id_prod.count()
# 205 How much did you enjoy playing with Anya? 205 How annoyed did you feel with Anya? 205 How smart did you think Anya was? 205 Would you recommend this game to a friend? 205 Did you like Anya? 205 How frustrated did Anya make you feel? 205 Do you think Anya improved during the conversation? 205 Did the conversation get easier over time? 205 Did you feel like you were helping Anya with more clues? 205 What did you like most about Anya? 202 What would you change about Anya to make her more enjoyable to play with? 204 Anything else you would like to share? 199 What is your Gender? 205 What is your age group? 205 psid 232 Start Date (UTC) 205 Submit Date (UTC) 205 Network ID 205 psid_doc 209 psid_realid 209 bot 209 time_start 209 time_finish 209 time_state 209 Bot_BD 209 timestamp 209 timeframe 209 dtype: int64
df_total = df_id_prod.drop_duplicates(subset=['psid'], keep='last')
df_unique = df_total.drop_duplicates(subset=['psid'], keep='last')
df_unique.count()
# 203 How much did you enjoy playing with Anya? 203 How annoyed did you feel with Anya? 203 How smart did you think Anya was? 203 Would you recommend this game to a friend? 203 Did you like Anya? 203 How frustrated did Anya make you feel? 203 Do you think Anya improved during the conversation? 203 Did the conversation get easier over time? 203 Did you feel like you were helping Anya with more clues? 203 What did you like most about Anya? 200 What would you change about Anya to make her more enjoyable to play with? 202 Anything else you would like to share? 197 What is your Gender? 203 What is your age group? 203 psid 230 Start Date (UTC) 203 Submit Date (UTC) 203 Network ID 203 psid_doc 207 psid_realid 207 bot 207 time_start 207 time_finish 207 time_state 207 Bot_BD 207 timestamp 207 timeframe 207 dtype: int64
df_unique['bot'].value_counts()
B 107 A 100 Name: bot, dtype: int64
df_unique.groupby(['bot'])['What is your Gender?'].value_counts()
bot What is your Gender?
A Male 54
Female 33
B Male 58
Female 35
Name: What is your Gender?, dtype: int64
df_unique.groupby(['bot'])['What is your age group?'].value_counts()
bot What is your age group?
A 31-35 33
36-40 25
26-30 16
20-25 11
Over 40 2
B 31-35 32
36-40 30
26-30 18
20-25 11
Over 40 1
Under 20 1
Name: What is your age group?, dtype: int64
for question in questions:
print(df_unique.groupby(['Bot_BD'])[question].value_counts(normalize=True).mul(100).round(1).astype(str) + '%')
Bot_BD How much did you enjoy playing with Anya?
A 4 - It was good 34.5%
5 - It was fantastic 28.7%
3 - It was ok 25.3%
2 - It was bad 6.9%
1 - It was awful 4.6%
B 5 - It was fantastic 38.7%
4 - It was good 35.5%
3 - It was ok 12.9%
2 - It was bad 7.5%
1 - It was awful 5.4%
Name: How much did you enjoy playing with Anya?, dtype: object
Bot_BD How annoyed did you feel with Anya?
A 3 - It was ok 27.6%
4- It wasn't annoying at all 23.0%
5 - It was fun 23.0%
2 - It was somewhat annoying 20.7%
1 - It was very annoying 5.7%
B 5 - It was fun 38.7%
3 - It was ok 20.4%
4- It wasn't annoying at all 19.4%
1 - It was very annoying 10.8%
2 - It was somewhat annoying 10.8%
Name: How annoyed did you feel with Anya?, dtype: object
Bot_BD How smart did you think Anya was?
A 3 - Anya was smart 40.2%
2 - It felt like Anya was learning 27.6%
4 - Anya was very clever 25.3%
1 - Anya was stupid 6.9%
B 3 - Anya was smart 38.7%
4 - Anya was very clever 31.2%
2 - It felt like Anya was learning 21.5%
1 - Anya was stupid 8.6%
Name: How smart did you think Anya was?, dtype: object
Bot_BD Would you recommend this game to a friend?
A Yes 56.3%
No 26.4%
Indifferent 17.2%
B Yes 64.5%
No 28.0%
Indifferent 7.5%
Name: Would you recommend this game to a friend?, dtype: object
Bot_BD Did you like Anya?
A Yes 64.4%
No 18.4%
Indifferent 17.2%
B Yes 66.7%
No 21.5%
Indifferent 11.8%
Name: Did you like Anya?, dtype: object
Bot_BD How frustrated did Anya make you feel?
A 5 - It was fantastic 25.3%
3 - It was ok 24.1%
2 - It was somewhat frustrating 21.8%
4 - It was fine 21.8%
1 - It was very frustrating 6.9%
B 5 - It was fantastic 38.7%
4 - It was fine 24.7%
3 - It was ok 17.2%
2 - It was somewhat frustrating 11.8%
1 - It was very frustrating 7.5%
Name: How frustrated did Anya make you feel?, dtype: object
Bot_BD Do you think Anya improved during the conversation?
A Yes 48.3%
No 29.9%
Indifferent 21.8%
B Yes 62.4%
No 30.1%
Indifferent 7.5%
Name: Do you think Anya improved during the conversation?, dtype: object
Bot_BD Did the conversation get easier over time?
A Yes 51.7%
No 31.0%
Indifferent 17.2%
B Yes 59.1%
No 31.2%
Indifferent 9.7%
Name: Did the conversation get easier over time?, dtype: object
Bot_BD Did you feel like you were helping Anya with more clues?
A Yes 64.4%
No 20.7%
Indifferent 14.9%
B Yes 74.2%
No 24.7%
Indifferent 1.1%
Name: Did you feel like you were helping Anya with more clues?, dtype: object
Bot_BD What did you like most about Anya?
A nothing 3.5%
amazing 2.4%
good 2.4%
not sure 2.4%
yes 2.4%
...
B very good 1.1%
very likely I love it so.. 1.1%
very nice 1.1%
voice 1.1%
well it nice 1.1%
Name: What did you like most about Anya?, Length: 163, dtype: object
Bot_BD What would you change about Anya to make her more enjoyable to play with?
A nothing 15.1%
not sure 3.5%
amazing 2.3%
. 1.2%
Anya video is very enjoyable 1.2%
...
B to improve the tone of her voice/way of speaking to make it seem more natural and easy to understand, and to make her more intuitive to guess the right animal 1.1%
very funny 1.1%
voice 1.1%
xxxxxxxxxxxxxxxxxxxxxxgggg 1.1%
yes very good 1.1%
Name: What would you change about Anya to make her more enjoyable to play with?, Length: 149, dtype: object
Bot_BD Anything else you would like to share?
A no 28.0%
No 6.1%
nothing 4.9%
nothing else 4.9%
Nothing 2.4%
...
B trust 1.1%
very good 1.1%
vits very good 1.1%
well it good 1.1%
yes . voice is good 1.1%
Name: Anything else you would like to share?, Length: 103, dtype: object
Bot_BD What is your Gender?
A Male 62.1%
Female 37.9%
B Male 62.4%
Female 37.6%
Name: What is your Gender?, dtype: object
Bot_BD What is your age group?
A 31-35 37.9%
36-40 28.7%
26-30 18.4%
20-25 12.6%
Over 40 2.3%
B 31-35 34.4%
36-40 32.3%
26-30 19.4%
20-25 11.8%
Over 40 1.1%
Under 20 1.1%
Name: What is your age group?, dtype: object
df_group = df_unique.groupby(['bot'])['What is your age group?'].value_counts()
closed_questions = ['How much did you enjoy playing with Anya?',
'How annoyed did you feel with Anya?',
'How smart did you think Anya was?',
'Would you recommend this game to a friend?', 'Did you like Anya?',
'How frustrated did Anya make you feel?',
'Do you think Anya improved during the conversation?',
'Did the conversation get easier over time?',
'Did you feel like you were helping Anya with more clues?','What is your Gender?',
'What is your age group?']